import requests
from bs4 import BeautifulSoup
import re

def scrawler(url, selector, flag):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'}
    r = requests.get(url, headers=headers)
    codes = r.text
    bs = BeautifulSoup(codes, 'html.parser')
    if flag == 0:
        return [item.text for item in bs.select(selector=selector)]
    elif flag == 1:
        return [item.attrs['href'] for item in bs.select(selector=selector)]


url = "http://www.aas.net.cn/"

abstracts, authors = [], []
authors.extend(scrawler(url, "#onlineFirst div.article-list-author", flag=0))
articles_urls = scrawler(url, "#onlineFirst div.article-list-title a", flag=1)

for article_url in articles_urls:
    pageurl = "http://www.aas.net.cn/" + article_url
    abstracts.extend(scrawler(pageurl, "div.article-abstract", flag=0))

# 使用正则表达式删除abstracts列表里元素中的\n和\t
cleaned_abstracts = [''.join(abstract).replace('\n', '').replace('\t', '') for abstract in abstracts]

# 使用正则表达式去除authors列表中的所有间隙
cleaned_authors = [re.sub(r'\s+', '', author) for author in authors]


print("作者：")
print(cleaned_authors[0])
print("摘要：")
print(cleaned_abstracts[0])